package com.github.purplepapa.Storm_Simple_Crawler;
import java.util.UUID;
import storm.kafka.BrokerHosts;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.utils.Utils;
public class CrawlerTopology {
private static final String CRAWL_SPOUT_ID = "crawl-spout";
private static final String URLDEDUP_BOLT_ID = "urldedup-bolt";
private static final String PARTITION_BOLT_ID = "partition-bolt";
private static final String FETCH_BOLT_ID = "fetch-bolt";
private static final String PARSE_BOLT_ID = "parse-bolt";
private static final String TOPOLOGY_NAME = "crawl-topology";
public static void main(String[] args) throws Exception {
TopologyBuilder builder = new TopologyBuilder();
System.out.println("in main:");
String topicName = "crawl";
builder.setSpout("random-sentence", new RandomSentenceSpout());
builder.setBolt(
"forwardToKafka",
new ForwardToKafkaBolt("54.245.107.71:9092",
"kafka.serializer.StringEncoder", topicName), 2)
.shuffleGrouping("random-sentence");
BrokerHosts hosts = new ZkHosts("54.245.107.71:2181");
SpoutConfig spoutConfig = new SpoutConfig(hosts, topicName, "/"
+ topicName, UUID.randomUUID().toString());
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
builder.setSpout(CRAWL_SPOUT_ID, kafkaSpout);
// Kafka crawl spout --> URLDeduplicatorBolt
builder.setBolt(URLDEDUP_BOLT_ID, new URLDeduplicatorBolt())
.shuffleGrouping(CRAWL_SPOUT_ID);
// // URLDedupliatorBolt --> URLPartitionerBolt
// builder.setBolt(PARTITION_BOLT_ID, new URLPartitionerBolt())
// .fieldsGrouping(URLDEDUP_BOLT_ID, new Fields("host"));
// // URLPartitionerBolt --> SimpleFetcherBolt
// builder.setBolt(FETCH_BOLT_ID, new SimpleFetcherBolt())
// .shuffleGrouping(PARTITION_BOLT_ID);
// // SimpleFetcherBolt --> ParserBolt
// builder.setBolt(PARSE_BOLT_ID, new ParserBolt()).shuffleGrouping(
// FETCH_BOLT_ID);
Config config = new Config();
if (args.length == 0) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(TOPOLOGY_NAME, config,
builder.createTopology());
Utils.sleep(50000);
cluster.killTopology(TOPOLOGY_NAME);
cluster.shutdown();
} else {
StormSubmitter.submitTopology(args[0], config,
builder.createTopology());
}
}
}